import pandas as pd
from datasets import Dataset

src_path = '/share_data/mmd/dcm/CT出血-100例+报告/baogao.xlsx'
dst_path = '/share_data/liupan/med_corpus'
df = pd.read_excel(src_path)
df['text'] = df['描述'] + ' ' + df['诊断']
dset = Dataset.from_pandas(df)

dset.save_to_disk(dst_path)